1.Obtain the Data¶

In [ ]:
from tensorflow import keras
from tensorflow.keras import layers
import pathlib
from tensorflow.keras.utils import image_dataset_from_directory
from pathlib import Path
from glob import glob
In [ ]:
data_dir = Path("C:/Users/Varshan/CSCN8010/data/BIRDS_525_SPECIES")

# Specify the dataset split (train, test, or valid)
split = "train"

# Get the list of class folders in the specified split
class_folders = list(data_dir.joinpath(split).glob("*"))

# Dictionary to store class names and their corresponding image counts
class_image_counts = {}

# Iterate through class folders and count images
for class_folder in class_folders:
    class_name = class_folder.name
    image_count = len(list(class_folder.glob("*.jpg")))  # Assuming images are in JPG format
    class_image_counts[class_name] = image_count

# Sort the classes based on image counts in descending order
sorted_classes = sorted(class_image_counts.items(), key=lambda x: x[1], reverse=True)

# Get the top three classes
top_three_classes = sorted_classes[:3]

# Print the top three classes and their image counts
for class_name, image_count in top_three_classes:
    print(f"Class: {class_name}, Image Count: {image_count}")
Class: RUFOUS TREPE, Image Count: 263
Class: HOUSE FINCH, Image Count: 248
Class: D-ARNAUDS BARBET, Image Count: 233
In [ ]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import matplotlib.pyplot as plt
import numpy as np
import os
In [ ]:
# Visualize some images from each of the top three classes
for class_name in [class_name for class_name, _ in top_three_classes]:
    class_folder = os.path.join(data_dir, "train", class_name)
    image_paths = glob(os.path.join(class_folder, "*.jpg"))
    
    # Display the first few images from each class
    num_images_to_display = 3
    for i in range(min(num_images_to_display, len(image_paths))):
        image_path = image_paths[i]
        img = load_img(image_path)
        img_array = img_to_array(img)
        
        # Display the image
        plt.figure(figsize=(2, 2))
        plt.imshow(img_array.astype(np.uint8))
        plt.title(f"Class: {class_name}")
        plt.axis('off')
        plt.show()

2.Use data-augmentation to increase the number of training images. You are encouraged to try out various augmentation methods supported by Keras.¶

In [ ]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import load_img, img_to_array, ImageDataGenerator
In [ ]:
# Top 3 classes
top_three_classes = ["RUFOUS TREPE", "HOUSE FINCH", "D-ARNAUDS BARBET"]

# ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using the generators
train_generator = train_datagen.flow_from_directory(
    os.path.join(data_dir, 'train'),
    target_size=(224, 224),  
    batch_size=32,
    class_mode='categorical',
    classes=top_three_classes
)

validation_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'valid'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    classes=top_three_classes
)

test_generator = test_datagen.flow_from_directory(
    os.path.join(data_dir, 'test'),
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical',
    classes=top_three_classes,
    shuffle=False  
)
Found 744 images belonging to 3 classes.
Found 15 images belonging to 3 classes.
Found 15 images belonging to 3 classes.
In [ ]:
# Display augmented images for each class
for class_name in top_three_classes:
    # Create a directory to save augmented images for each class
    save_dir = Path(f"C:/Users/Varshan/CSCN8010/augmented_images/{class_name}")
    save_dir.mkdir(parents=True, exist_ok=True)

    # Get a sample image path for the specified class
    class_folder = Path(data_dir) / 'train' / class_name
    image_paths = list(class_folder.glob("*.jpg"))
    sample_image_path = image_paths[0]

    # Load the sample image
    img = load_img(sample_image_path)
    img_array = img_to_array(img)
    img_array = img_array.reshape((1,) + img_array.shape)

    # Generate and save 9 augmented images for each class
    num_augmented_images = 9
    augmentation_flow = train_datagen.flow(img_array, batch_size=1, save_to_dir=save_dir, save_prefix="aug", save_format="jpg")

    for i in range(num_augmented_images):
        augmented_img = augmentation_flow.next()

    # Display a few augmented images for each class
    augmented_files = list(save_dir.glob("*.jpg"))
    plt.figure(figsize=(15, 15))

    for i in range(min(9, len(augmented_files))):
        augmented_img_path = augmented_files[i]
        augmented_img = load_img(augmented_img_path)
        augmented_img_array = img_to_array(augmented_img)

        plt.subplot(3, 3, i + 1)
        plt.imshow(augmented_img_array.astype(np.uint8))
        plt.title(f'Class: {class_name}\nAugmented Image {i + 1}')
        plt.axis('off')

    plt.show()

3.Fine-Tune VGG16 (pre-trained on imagenet), to classify the 3 classes (2 points)¶

In [ ]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Conv2D, MaxPooling2D
In [ ]:
# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))
In [ ]:
base_model.summary()
Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0         
                                                                 
 block3_conv1 (Conv2D)       (None, 56, 56, 256)       295168    
                                                                 
 block3_conv2 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_conv3 (Conv2D)       (None, 56, 56, 256)       590080    
                                                                 
 block3_pool (MaxPooling2D)  (None, 28, 28, 256)       0         
                                                                 
 block4_conv1 (Conv2D)       (None, 28, 28, 512)       1180160   
                                                                 
 block4_conv2 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_conv3 (Conv2D)       (None, 28, 28, 512)       2359808   
                                                                 
 block4_pool (MaxPooling2D)  (None, 14, 14, 512)       0         
                                                                 
 block5_conv1 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv2 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_conv3 (Conv2D)       (None, 14, 14, 512)       2359808   
                                                                 
 block5_pool (MaxPooling2D)  (None, 7, 7, 512)         0         
                                                                 
=================================================================
Total params: 14,714,688
Trainable params: 14,714,688
Non-trainable params: 0
_________________________________________________________________
In [ ]:
# Freeze the layers of the pre-trained model
for layer in base_model.layers:
    layer.trainable = False

Here I implemented Freezing layers, which involves setting the trainable property of the layers to False, preventing the weights of those layers from being updated during the training process. This is often done to keep the pre-trained features fixed while training additional layers on top. This is particularly useful when you have limited labeled data for your specific task, and want to leverage the knowledge captured by the pre-trained model.¶

In [ ]:
# Get one batch of data from the training generator
batch_features, batch_labels = next(train_generator)

# Print the shape of the features and labels
print("Batch Features Shape:", batch_features.shape)
print("Batch Labels Shape:", batch_labels.shape)
Batch Features Shape: (32, 224, 224, 3)
Batch Labels Shape: (32, 3)
In [ ]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
In [ ]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))  
model.add(Dense(3, activation='softmax'))


# Compile the model
model.compile(loss="categorical_crossentropy", optimizer=RMSprop(), metrics=["accuracy"])

# Define ModelCheckpoint callback
callbacks = [
    ModelCheckpoint(filepath="./models/feature_extraction.keras", save_best_only=True, monitor="val_loss")
]

# Train the model
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=callbacks
)

# Access training and validation accuracy and loss values
training_loss = history.history['loss']
training_accuracy = history.history['accuracy']
validation_loss = history.history['val_loss']
validation_accuracy = history.history['val_accuracy']
Epoch 1/20
24/24 [==============================] - 21s 843ms/step - loss: 7.4933 - accuracy: 0.3737 - val_loss: 1.0599 - val_accuracy: 0.5333
Epoch 2/20
24/24 [==============================] - 24s 993ms/step - loss: 1.1059 - accuracy: 0.4301 - val_loss: 1.0377 - val_accuracy: 0.4000
Epoch 3/20
24/24 [==============================] - 27s 1s/step - loss: 1.0079 - accuracy: 0.5188 - val_loss: 1.2693 - val_accuracy: 0.5333
Epoch 4/20
24/24 [==============================] - 27s 1s/step - loss: 1.0328 - accuracy: 0.5605 - val_loss: 0.9988 - val_accuracy: 0.6000
Epoch 5/20
24/24 [==============================] - 29s 1s/step - loss: 0.9423 - accuracy: 0.5954 - val_loss: 0.9906 - val_accuracy: 0.4667
Epoch 6/20
24/24 [==============================] - 34s 1s/step - loss: 0.9550 - accuracy: 0.6169 - val_loss: 0.7658 - val_accuracy: 0.6667
Epoch 7/20
24/24 [==============================] - 29s 1s/step - loss: 0.8646 - accuracy: 0.6237 - val_loss: 0.6643 - val_accuracy: 0.8667
Epoch 8/20
24/24 [==============================] - 29s 1s/step - loss: 0.8520 - accuracy: 0.6935 - val_loss: 1.2558 - val_accuracy: 0.5333
Epoch 9/20
24/24 [==============================] - 27s 1s/step - loss: 0.7924 - accuracy: 0.7003 - val_loss: 0.8156 - val_accuracy: 0.6667
Epoch 10/20
24/24 [==============================] - 27s 1s/step - loss: 0.7150 - accuracy: 0.7070 - val_loss: 0.4467 - val_accuracy: 0.8000
Epoch 11/20
24/24 [==============================] - 28s 1s/step - loss: 0.7745 - accuracy: 0.7056 - val_loss: 0.6860 - val_accuracy: 0.8667
Epoch 12/20
24/24 [==============================] - 26s 1s/step - loss: 0.6534 - accuracy: 0.7500 - val_loss: 0.6509 - val_accuracy: 0.8667
Epoch 13/20
24/24 [==============================] - 29s 1s/step - loss: 0.6792 - accuracy: 0.7245 - val_loss: 0.4770 - val_accuracy: 0.8000
Epoch 14/20
24/24 [==============================] - 28s 1s/step - loss: 0.6395 - accuracy: 0.7688 - val_loss: 0.6084 - val_accuracy: 0.8000
Epoch 15/20
24/24 [==============================] - 27s 1s/step - loss: 0.6183 - accuracy: 0.7688 - val_loss: 0.5814 - val_accuracy: 0.6667
Epoch 16/20
24/24 [==============================] - 26s 1s/step - loss: 0.5932 - accuracy: 0.7944 - val_loss: 0.4300 - val_accuracy: 0.8000
Epoch 17/20
24/24 [==============================] - 27s 1s/step - loss: 0.5393 - accuracy: 0.8105 - val_loss: 0.6155 - val_accuracy: 0.8000
Epoch 18/20
24/24 [==============================] - 26s 1s/step - loss: 0.6069 - accuracy: 0.7796 - val_loss: 0.7944 - val_accuracy: 0.6000
Epoch 19/20
24/24 [==============================] - 28s 1s/step - loss: 0.5715 - accuracy: 0.8051 - val_loss: 0.4721 - val_accuracy: 0.8000
Epoch 20/20
24/24 [==============================] - 27s 1s/step - loss: 0.5165 - accuracy: 0.8145 - val_loss: 0.3535 - val_accuracy: 0.8667

The training accuracy starts at approximately 37.37% and increases to around 81.45% by the end of 20 epochs.This indicates that the model is learning to make better predictions on the training set.¶

The validation accuracy starts at 53.33% and reaches around 86.67% by the end of 20 epochs.The increasing trend in validation accuracy is a positive sign, suggesting that the model is generalizing well to unseen data.¶

In [ ]:
# Extract training and validation accuracies from the training log
train_accuracy = [0.3737, 0.4301, 0.5188, 0.5605, 0.5954, 0.6169, 0.6237, 0.6935, 0.7003, 0.7070, 0.7056, 0.7500, 0.7245, 0.7688, 0.7688, 0.7944, 0.8105, 0.7796, 0.8051, 0.8145]
val_accuracy = [0.5333, 0.4000, 0.5333, 0.6000, 0.4667, 0.6667, 0.8667, 0.5333, 0.6667, 0.8000, 0.8667, 0.8667, 0.8000, 0.8000, 0.6667, 0.8000, 0.8000, 0.6000, 0.8667, 0.8667]

# Create a plot
epochs = range(1, 21) 
plt.plot(epochs, train_accuracy, label='Training Accuracy')
plt.plot(epochs, val_accuracy, label='Validation Accuracy')

# Add labels and title
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracies over Epochs')

# Add legend
plt.legend()

# Show the plot
plt.show()

The gap between training and validation accuracies has decreased over epochs, which is a positive sign. A large gap might indicate overfitting.¶

In [ ]:
model.summary()
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 222, 222, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 111, 111, 32)     0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 109, 109, 64)      18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 54, 54, 64)       0         
 2D)                                                             
                                                                 
 flatten_6 (Flatten)         (None, 186624)            0         
                                                                 
 dense_11 (Dense)            (None, 128)               23888000  
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_12 (Dense)            (None, 3)                 387       
                                                                 
=================================================================
Total params: 23,907,779
Trainable params: 23,907,779
Non-trainable params: 0
_________________________________________________________________

4.Explore the model performance: accuracy, confusion metric, precision, recall, F1-score, precision-recall curve and its area under the curve (AUC). Explore specific examples in which the model failed to predict correctly.¶

In [ ]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_recall_curve, auc
In [ ]:
test_generator.reset()  
In [ ]:
# Make predictions on the test set
y_true = test_generator.classes
y_pred_prob = model.predict(test_generator)
y_pred = np.argmax(y_pred_prob, axis=1)
1/1 [==============================] - 0s 155ms/step
1/1 [==============================] - 0s 155ms/step
In [ ]:
# Accuracy
accuracy = accuracy_score(y_true, y_pred)
print(f'Accuracy: {accuracy}')

# Confusion Matrix
conf_matrix = confusion_matrix(y_true, y_pred)
print('Confusion Matrix:')
print(conf_matrix)

# Classification Report
class_report = classification_report(y_true, y_pred, target_names=top_three_classes)
print('Classification Report:')
print(class_report)
Accuracy: 1.0
Confusion Matrix:
[[5 0 0]
 [0 5 0]
 [0 0 5]]
Classification Report:
                  precision    recall  f1-score   support

    RUFOUS TREPE       1.00      1.00      1.00         5
     HOUSE FINCH       1.00      1.00      1.00         5
D-ARNAUDS BARBET       1.00      1.00      1.00         5

        accuracy                           1.00        15
       macro avg       1.00      1.00      1.00        15
    weighted avg       1.00      1.00      1.00        15

In [ ]:
# Precision-Recall Curve and AUC
precision, recall, _ = precision_recall_curve(y_true, y_pred_prob[:, 1], pos_label=1)
pr_auc = auc(recall, precision)

plt.figure(figsize=(6, 6))
plt.plot(recall, precision, label=f'PR Curve (AUC = {pr_auc:.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend()
plt.show()

# Explore misclassifications
misclassified_indices = np.where(y_true != y_pred)[0]

# Display a few examples of misclassifications
for index in misclassified_indices[:5]:
    true_class = top_three_classes[y_true[index]]
    predicted_class = top_three_classes[y_pred[index]]
    print(f'Example {index + 1}: True Class: {true_class}, Predicted Class: {predicted_class}')

The model accuracy is 100% is may be because of two reasons.¶

-It has fewer number of datasets.¶

-Freezing the layers that prevent the weights of those pre-trained layers from being updated during the training process.¶

This means that, based on the evaluation set (test set), the model correctly predicted all instances, achieving 100% accuracy. Every prediction made by the model on the given dataset is correct.¶